import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
data = pd.read_csv("data_mete.csv",delimiter=",")
data.shape
data['recency']=data['recency'].apply(np.log)
data['frequency']=data['frequency'].apply(np.log)
data['total_charges']=data['total_charges'].apply(np.log)
data.head()
data.drop(data.columns[0],axis=1,inplace=True)
data.columns=['recency','frequency','monetary']
data.head()
data.isna().sum(axis=0)
data.dropna()
data_copy = data.copy()
quantiles = data.quantile(q=[0.25,0.5,0.75])
quantiles
quantiles.to_dict()
def R_Score(x,p,d): # x=value , p=recency,monetary,frequancy, d= quartiles dict
if x <= d[p][0.25]:
return 4
elif x <= d[p][0.50]:
return 3
elif x <= d[p][0.75]:
return 2
else:
return 1
def FM_Score(x,p,d): # x=value , p=recency,monetary,frequancy, d= quartiles dict
if x <= d[p][0.25]:
return 1
elif x <= d[p][0.50]:
return 2
elif x <= d[p][0.75]:
return 3
else:
return 4
rfm_table = data
rfm_table['R_Quartile'] = rfm_table['recency'].apply(
R_Score,args = ('recency',quantiles))
rfm_table['F_Quartile'] = rfm_table['frequency'].apply(
FM_Score,args=('frequency',quantiles))
rfm_table['M_Quartile'] = rfm_table['monetary'].apply(
FM_Score,args=('monetary',quantiles))
rfm_table
rfm_table['RFMScore'] = rfm_table.R_Quartile.map(str) \
+ rfm_table.F_Quartile.map(str) \
+ rfm_table.M_Quartile.map(str)
rfm_table
#Best Customers
rfm_table[rfm_table['RFMScore']=='444'].sort_values(
'monetary',ascending=False)
rfm_table['Total Score'] = rfm_table['R_Quartile']+rfm_table['F_Quartile']+rfm_table['M_Quartile']
rfm_table.sort_values('Total Score',ascending=False)
rfm_table[rfm_table['RFMScore']=='444']
def rfm_level(data):
if data['Total Score'] >= 9:
return 'Can\'t Loose Them'
elif (data['Total Score']>= 8) and (data['Total Score']<9):
return 'Champions'
elif (data['Total Score']>= 7) and (data['Total Score']<8):
return 'Loyals'
elif ((data['Total Score'] >= 6) and (data['Total Score'] < 7)):
return 'Potential'
elif ((data['Total Score'] >= 5) and (data['Total Score'] < 6)):
return 'Promising'
elif ((data['Total Score'] >= 4) and (data['Total Score'] < 5)):
return 'Needs Attention'
else:
return 'Require Activation'
rfm_table['RFM Level'] = rfm_table.apply(rfm_level,axis=1)
rfm_table
rfm_table["RFM Level"].value_counts()
rfm_level_agg = rfm_table.groupby('RFM Level').agg({
'recency': 'mean',
'frequency': 'mean',
'monetary': ['mean', 'count']}).round(1)
print(rfm_level_agg)
import squarify
#rfm_level_agg.columns = rfm_level_agg.columns.droplevel()
rfm_level_agg.columns = ['RecencyMean','FrequencyMean','MonetaryMean', 'Count']
#Create our plot and resize it.
fig = plt.gcf()
ax = fig.add_subplot()
fig.set_size_inches(16, 9)
squarify.plot(sizes=rfm_level_agg['Count'],
label=['Can\'t Loose Them',
'Champions',
'Loyal',
'Needs Attention',
'Potential',
'Promising',
'Require Activation'], alpha=.6 )
plt.title("RFM Segments",fontsize=18,fontweight="bold")
plt.axis('off')
plt.show()
x3 = data[['R_Quartile','F_Quartile','M_Quartile']].iloc[:,:].values
inertia = []
inertia = []
for n in range(1 , 11):
algorithm = (KMeans(n_clusters = n ,init='k-means++', n_init = 10 ,max_iter=300,
tol=0.0001, random_state= 111 , algorithm='full') )
algorithm.fit(x3)
inertia.append(algorithm.inertia_)
plt.figure(1 , figsize = (15 ,6))
plt.plot(np.arange(1 , 11) , inertia , 'o')
plt.plot(np.arange(1 , 11) , inertia , '-' , alpha = 0.5)
plt.xlabel('Number of Clusters') , plt.ylabel('Inertia')
plt.show()
k=3
new_data = data['RFM Level']
new_data = np.array(new_data)
type(new_data)
new_data
algorithm = (KMeans(n_clusters = 3 ,init='k-means++', n_init = 10 ,max_iter=300,
tol=0.0001, random_state= 111 , algorithm='elkan') )
algorithm.fit(x3)
labels3 = algorithm.labels_
centroids3 = algorithm.cluster_centers_
import plotly.graph_objs as go
import plotly as py
def levels(data) :
if data['RFM Level']=="Can't Loose Them" :
return 1
elif data['RFM Level']=="Champions":
return 2
elif data['RFM Level']=="Loyal" :
return 3
elif data['RFM Level']=="Needs Attention" :
return 4
elif data['RFM Level']=="Potential" :
return 5
elif data['RFM Level']=="Promising" :
return 6
else:
return 7
data["RFM Level"] = data.apply(lambda data:levels(data),
axis = 1)
data['label3'] = labels3
trace1 = go.Scatter3d(
x= data['recency'],
y= data['frequency'],
z= data['monetary'],
mode='markers',
marker=dict(
color = data['RFM Level'],
size= 1,
line=dict(
color= data['RFM Level'],
width= 12
),
opacity=0.8
)
)
data_temp = [trace1]
layout = go.Layout(
# margin=dict(
# l=0,
# r=0,
# b=0,
# t=0
# )
title= 'Clusters',
scene = dict(
xaxis = dict(title = 'R'),
yaxis = dict(title = 'F'),
zaxis = dict(title = 'M')
)
)
fig = go.Figure(data=data_temp, layout=layout)
py.offline.iplot(fig)